################
#### replication code for Nathan, 2025, ``Explaining Urban Order," World Politics
#### this file replicates all analyses in the *SUPPLEMENTARY MATERIAL*
#### see "maintext.R" for the replication code for all analyses in the *MAIN TEXT*
#### 
#### note: this code was written under R version 4.2.1 and uses deprecated GIS packages (maptools, sp, rgeos, raster) that will only run in older versions of R. these packages only became deprecated during the life cycle of this research project. you must install an earlier R version to run the full code.
###############



#clear workspace:
rm(list=ls())

#set working directory to replication data folder:
setwd("~/Dropbox/UMProjects_20172018/UrbanEnvironment/code/crossnational_cities/replication_data/")

# load packages:
library(foreign)
library(RCurl) 
library(AER)
library(sandwich)
library(calibrate)
library(data.table)
library(MASS)
library(lmtest)
library(car)
library(lmerTest)
library(mgcv)
library(sp)
library(maptools)
library(rgeos)
library(sf)
library(rgdal)
library(igraph)
library(raster)
library(stringr)
library(arm)

#########################
### load data 
#########################

load("./data_topost/main_analysis_dat.Rdata")
	#this includes one object "main2"
	#this is the neighborhood (t-community) level dataset for the 36 sample cities
	#used in the main analyses

sub1 <- main2[main2$colrule == 0 & main2$average_age >= 1950,]
dim(sub1)

sub1 $auto_med <- 0
sub1 $auto_med[sub1 $v2x_polyarchy <= median(sub1$v2x_polyarchy)] <- 1 
median(sub1$v2x_polyarchy)


load("./data_topost/citylevel.Rdata")
	#this includes one object "d"
	#this is the city-level data for the largest 1001 cities in Sub-Saharan Africa for which Boeing's (2021) data
	#does not have substantial missingness
	#the file is based on Boeing (2021) and extends it with additional columns

load("./data_topost/housing.Rdata")
	#this includes two objects: base and housing
	#housing is the time series dataset on state housing consturction in 22 cities
	#base is a matrix listing the within case sign on the diretion of main effect for this subsample of cities, based on Figure SI.5

load("./data_topost/timedat.Rdata")
	#contains a series of city-specific time series datasets of V-DEM and Geddes et al democracy measures by year
	#saved in format timedat.accra, timedat.abuja, etc. There is one for each city in the sample. 

audit <- read.csv(file="./data_topost/audit_NNEDIT.csv", stringsAsFactors=FALSE, header=TRUE)
	#manually entered notes for the audit of gridded neighborhoods desribed in the SI.

vdem <- read.csv("./data_topost/V-Dem-CY-Core-v11.1.csv")
	#vdem version 11.1 as downloaded from vdem website


#########################
### make robust standard errors function ####
#########################

summary.lm <- function (object, correlation = FALSE, 
                        symbolic.cor = FALSE, robust=FALSE,
                        cluster=c(NULL,NULL),...) {
  # add extension for robust standard errors
  if(robust==TRUE){ 
    # save variable that are necessary to calcualte robust sd
    X <- model.matrix(object)
    u2 <- residuals(object)^2
    XDX <- 0
    
    ## One needs to calculate X'DX. But due to the fact that
    ## D is huge (NxN), it is better to do it with a cycle.
    for(i in 1:nrow(X)) {
      XDX <- XDX + u2[i]*X[i,]%*%t(X[i,])
    }
    
    # inverse(X'X)
    XX1 <- solve(t(X)%*%X,tol = 1e-100)
    
    # Sandwich Variance calculation (Bread x meat x Bread)
    varcovar <- XX1 %*% XDX %*% XX1
    
    # adjust degrees of freedom 
    dfc_r <- sqrt(nrow(X))/sqrt(nrow(X)-ncol(X))
    
    # Standard errors of the coefficient estimates are the
    # square roots of the diagonal elements
    rstdh <- dfc_r*sqrt(diag(varcovar))
  }
  # add extension for clustered standard errors
  if(!is.null(cluster)&robust==T){warning("Robust standard errors are calculated. Set robust=F to calculate clustered standard errors.")}
  if(!is.null(cluster)&robust==F){
    if(""%in%cluster){stop("No variable for clustering provided.")}
    if(length(cluster)>2){stop("The function only allows max. 2 clusters. You provided more.")}
    n_coef <- all.vars(object$call$formula)
    if(length(cluster)==1){
      dat <- na.omit(eval(object$call$data)[,c(n_coef,cluster)])
      if(nrow(dat)<nrow(object$model)){stop("Not all observation have a cluster.")}
      cluster_vector <- dat[,cluster]
      require(sandwich, quietly = TRUE)
      M <- res_length <- length(unique(cluster_vector))
      N <- length(cluster_vector)
      K <- object$rank
      dfc <- (M/(M-1))*((N-1)/(N-K))
      uj  <- na.omit(apply(estfun(object),2, function(x) tapply(x, cluster_vector, sum)));
      varcovar <- dfc*sandwich(object, meat=crossprod(uj)/N)
      rstdh <- sqrt(diag(varcovar))
    } 
    if(length(cluster)==2){
      dat_1 <- na.omit(eval(object$call$data)[,c(n_coef,cluster[1])])
      if(nrow(dat_1)<nrow(object$model)){stop("Not all observation have a cluster.")}
      dat_2 <- na.omit(eval(object$call$data)[,c(n_coef,cluster[2])])
      if(nrow(dat_2)<nrow(object$model)){stop("Not all observation have a cluster.")}
      
      dat <- na.omit(eval(object$call$data)[,c(n_coef,cluster)])
      library(sandwich,quietly = TRUE)
      cluster1 <- dat[,cluster[1]]
      cluster2 <- dat[,cluster[2]]
      cluster12 = paste(cluster1,cluster2, sep="")
      M1  <- length(unique(cluster1))
      M2  <- length(unique(cluster2))   
      M12 <- res_length <-length(unique(cluster12))
      N   <- length(cluster1)          
      K   <- object$rank             
      dfc1  <- (M1/(M1-1))*((N-1)/(N-K))  
      dfc2  <- (M2/(M2-1))*((N-1)/(N-K))  
      dfc12 <- (M12/(M12-1))*((N-1)/(N-K))  
      u1j   <- apply(estfun(object), 2, function(x) tapply(x, cluster1,  sum)) 
      u2j   <- apply(estfun(object), 2, function(x) tapply(x, cluster2,  sum)) 
      u12j  <- apply(estfun(object), 2, function(x) tapply(x, cluster12, sum)) 
      vc1   <-  dfc1*sandwich(object, meat=crossprod(u1j)/N )
      vc2   <-  dfc2*sandwich(object, meat=crossprod(u2j)/N )
      vc12  <- dfc12*sandwich(object, meat=crossprod(u12j)/N)
      varcovar <- vc1 + vc2 - vc12
      rstdh <- sqrt(diag(varcovar))
    } 
    
  }
  z <- object
  p <- z$rank
  rdf <- z$df.residual
  if (p == 0) {
    r <- z$residuals
    n <- length(r)
    w <- z$weights
    if (is.null(w)) {
      rss <- sum(r^2)
    }
    else {
      rss <- sum(w * r^2)
      r <- sqrt(w) * r
    }
    resvar <- rss/rdf
    ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
    class(ans) <- "summary.lm"
    ans$aliased <- is.na(coef(object))
    ans$residuals <- r
    ans$df <- c(0L, n, length(ans$aliased))
    ans$coefficients <- matrix(NA, 0L, 4L)
    dimnames(ans$coefficients) <- list(NULL, c("Estimate", 
                                               "Std. Error", "t value", "Pr(>|t|)"))
    ans$sigma <- sqrt(resvar)
    ans$r.squared <- ans$adj.r.squared <- 0
    return(ans)
  }
  if (is.null(z$terms)) 
    stop("invalid 'lm' object:  no 'terms' component")
  if (!inherits(object, "lm")) 
    warning("calling summary.lm(<fake-lm-object>) ...")
  Qr <- stats:::qr.lm(object)
  n <- NROW(Qr$qr)
  if (is.na(z$df.residual) || n - p != z$df.residual) 
    warning("residual degrees of freedom in object suggest this is not an \"lm\" fit")
  r <- z$residuals
  f <- z$fitted.values
  w <- z$weights
  if (is.null(w)) {
    mss <- if (attr(z$terms, "intercept")) 
      sum((f - mean(f))^2)
    else sum(f^2)
    rss <- sum(r^2)
  }
  else {
    mss <- if (attr(z$terms, "intercept")) {
      m <- sum(w * f/sum(w))
      sum(w * (f - m)^2)
    }
    else sum(w * f^2)
    rss <- sum(w * r^2)
    r <- sqrt(w) * r
  }
  resvar <- rss/rdf
  if (is.finite(resvar) && resvar < (mean(f)^2 + var(f)) * 
      1e-30) 
    warning("essentially perfect fit: summary may be unreliable")
  p1 <- 1L:p
  R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
  se <- sqrt(diag(R) * resvar)
  
  if(robust==T){se <- rstdh}
  if(!is.null(cluster)&robust==F){se <- rstdh}
  est <- z$coefficients[Qr$pivot[p1]]
  tval <- est/se
  ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
  ans$residuals <- r
  pval <- 2 * pt(abs(tval), 
                 rdf, lower.tail = FALSE)
  ans$coefficients <- cbind(est, se, tval, pval)
  dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]], 
                                     c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
  ans$aliased <- is.na(coef(object))
  ans$sigma <- sqrt(resvar)
  ans$df <- c(p, rdf, NCOL(Qr$qr))
  if (p != attr(z$terms, "intercept")) {
    df.int <- if (attr(z$terms, "intercept")) 
      1L
    else 0L
    ans$r.squared <- mss/(mss + rss)
    ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - 
                                                       df.int)/rdf)
    ans$fstatistic <- c(value = (mss/(p - df.int))/resvar, 
                        numdf = p - df.int, dendf = rdf)
    if(robust==T|(!is.null(cluster))){
      if(!is.null(cluster)){rdf <- res_length -1}
      pos_coef <- match(names(z$coefficients)[-match("(Intercept)",
                                                     names(z$coefficients))],
                        names(z$coefficients))
      
      P_m <- matrix(z$coefficients[pos_coef])
      
      R_m <- diag(1, 
                  length(pos_coef), 
                  length(pos_coef))
      
      ans$fstatistic <- c(value = t(R_m%*%P_m)%*%
                            (solve(varcovar[pos_coef,pos_coef],tol = 1e-100))%*%
                            (R_m%*%P_m)/(p - df.int), 
                          numdf = p - df.int, dendf = rdf)
      
    }
    
  }
  else ans$r.squared <- ans$adj.r.squared <- 0
  ans$cov.unscaled <- R
  dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1, 
                                                             1)]
  if (correlation) {
    ans$correlation <- (R * resvar)/outer(se, se)
    dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
    ans$symbolic.cor <- symbolic.cor
  }
  if (!is.null(z$na.action)) 
    ans$na.action <- z$na.action
  class(ans) <- "summary.lm"
  ans
}



	
	
########################################
#### Table SI.1 : City level predictors of orientation order (regression table)
########################################


d$popdens1000 <- d$popdens/10000

m1 <- lm(orientation_order ~ popdens1000 + resident_pop + elev_median  + elev_std + rain_mm + soil_quality_FAO + ocean_coast + border_city  + precolonial_major_city + nat_capital_currently + prov_capital_currently + as.factor(country_iso), data=d)
summary(m1)
round(summary(m1)$coefficients, digits=3)
length(m1 $residual)

m2 <- lmer(orientation_order ~ popdens1000 + resident_pop  + elev_median  + elev_std + rain_mm + soil_quality_FAO + ocean_coast + border_city + precolonial_major_city + nat_capital_currently + prov_capital_currently + settler_colony + colonial_power_britain + colonial_power_other + colonial_power_none +  (1 | country_iso), data=d)
summary(m2)
	#so colonial power france is hte omitted category
 coef <- summary(m2)$coefficients
 coef <- as.data.frame(coef)
round(coef, digits=3)
#length(m2$residuals)


########################################
#### Table SI.2: Selected cities vs. population of large African cities (t-tests) 
########################################

d$city <- paste(d$core_city, d$uc_id, sep="-")

table(d$country)
#calculate the largest *2* cities in each country: to increase odds we have historical materials on them
new2 <- d[d$core_city=="accra",]
for(i in 1:length(unique(d$country))){
	new <- d[d$country == unique(d$country)[i],]
	new <- new[order(new$resident_pop, decreasing=T),]
	if(nrow(new)>=3){
	new2 <- rbind(new2, new[1:2,])
	} else{
	new2 <- rbind(new2, new)
	}
}
dim(new2)

d$in_frame <- 0
d$in_frame[d$uc_id %in% new2$uc_id | d$nat_capital_currently==1 | d$resident_pop > 200000] <- 1
table(d$in_frame)
	#321 eligible cities

d$in_sample <- 0
d$in_sample[d$city %in% main2$city] <- 1
table(d$in_sample)
	#37 because abuja is counted 2x... 

table(d$city[d$in_sample==1] %in% d$city[d$in_frame==1])

vars <- c("orientation_order", "resident_pop", "popdens", "elev_std", "elev_median", "precolonial_major_city", "colonial_power_britain", "colonial_power_france", "settler_colony", "border_city", "rain_mm", "soil_quality_FAO", "ocean_coast")

holder <- as.data.frame(matrix(NA, nrow=length(vars), ncol=5))
colnames(holder) <- c("var", "mean_frame", "mean_sample", "diff", "p")

for(i in 1:length(vars)){
	holder[i,1] <- vars[i]
	holder[i,2] <- mean(d[d$in_frame==1, vars[i]], na.rm=T)
	holder[i,3] <- mean(d[d$in_sample==1, vars[i]], na.rm=T)
	holder[i,4] <-  mean(d[d$in_sample==1, vars[i]], na.rm=T) - mean(d[d$in_frame==1, vars[i]], na.rm=T)
	holder[i,5] <- t.test(d[d$in_frame==1, vars[i]], d[d$in_sample==1, vars[i]])$p.value
}

cbind(holder[,1], round(holder[,2:5], digits=3))


########################################
#### Table SI.1
########################################

#The underlying code for the information in this table is the final object (bands) created in the separate file: "subcity_tcommunities.R"
#The table was made manually, adapting from that "bands" object. There is not code to automatically generate the table. 

########################################
#### Figure SI.1
########################################

#The image files to make Figure SI.1 are in "./data_topost/figureSI1_files/"
#panel (a): 1975 map of Nakuru is from an aerial survey map located in the colletions at the Library of Congress
#panel (b): is a screen shot of the OpenStreetMap baselayer in QGIS taken manually when zoomed to that same approximate location. there is no replication code to reproduce this image. 

########################################
#### Auditing dates of gridded neighborhoods and Figure SI.2
########################################

#After dropping 22 neighborhoods in the 100 for which the historical imagery is not sufficiently high resolution for me to observe the layout at the time the neighborhood first appears, I can confirm my assumption of path dependence broadly holds in the remaining 78 gridded neighborhoods: 91\% (71) have had the same gridded layout that they have today throughout their existence, suggesting there is no measurement error in attributing the present-day layout to the regime type as of their initial construction. Among the other 7 (9\%), 3 (4\%) were redeveloped to become gridded in a later period than their initial construction, but the country's regime type was the same in both periods, so the measurement error does not affect my coding of the explanatory variable and would not change my results. Only in 4 (5\%) cases -- two neighborhoods in Cape Town, one in Johannesburg, and one Addis Ababa -- did the current gridded layout emerge at a later date from the initial construction under a regime with a polyarchy score sufficiently different to have changed whether the period is coded as a ``high autocracy."

head(audit)

table(audit$persistent)
#-99   0   1 
# 22   7  71 

7 / 78
#among the ones where I can actually observe it closely its 9%


#how many have actual changes in the high autocracy variable b/w when really built and when i coded them as built? 
audit[audit$persistent==0, c("uniqueid", "actual_year")]
#                     uniqueid actual_year
#11  addis_ababa-5134_tcom_392   1992-2000
#32   cape_town-3268_tcom_1032   1992-2005
#33   cape_town-3268_tcom_1063        2020
#42 johannesburg-3673_tcom_660   1985-2003
#64     khartoum-4335_tcom_375   2008-2009
#69     khartoum-4335_tcom_451        2009
#80     khartoum-4335_tcom_522   2008-2009

sub1$v2x_polyarchy[sub1$uniqueid == "addis_ababa-5134_tcom_392"]
#0.1653822
mean(vdem$v2x_polyarchy[vdem$country_name=="Ethiopia" & vdem$year >= 1992 & vdem$year<=2000])
#0.2351111
	#this is a change in the high autocracy variable (it goes from below to above)

sub1$v2x_polyarchy[sub1$uniqueid == "cape_town-3268_tcom_1032"]
#0.1738
mean(vdem$v2x_polyarchy[vdem$country_name=="South Africa" & vdem$year >= 1992 & vdem$year<=2005])
#0.6374286
	#this is also a change

sub1$v2x_polyarchy[sub1$uniqueid == "cape_town-3268_tcom_1063"]
#0.1738
mean(vdem$v2x_polyarchy[vdem$country_name=="South Africa" & vdem$year ==2020])
#0.701
	#this is also a change

sub1$v2x_polyarchy[sub1$uniqueid == "johannesburg-3673_tcom_660"]
#0.1818
mean(vdem$v2x_polyarchy[vdem$country_name=="South Africa" & vdem$year >= 1985 & vdem$year<=2003])
#0.4604737
	#this is also a change

sub1$v2x_polyarchy[sub1$uniqueid == "khartoum-4335_tcom_375"]
#0.1889742
mean(vdem$v2x_polyarchy[vdem$country_name=="Sudan" & vdem$year >= 2008 & vdem$year<=2009])
#0.2115
	#high autocracy both times

sub1$v2x_polyarchy[sub1$uniqueid == "khartoum-4335_tcom_451"]
#0.144891
mean(vdem$v2x_polyarchy[vdem$country_name=="Sudan" & vdem$year ==2009])
#0.212
	#high autocracy both times

sub1$v2x_polyarchy[sub1$uniqueid == "khartoum-4335_tcom_522"]
#0.1961739
mean(vdem$v2x_polyarchy[vdem$country_name=="Sudan" & vdem$year >= 2008 & vdem$year<=2009])
#0.2115
	#high autocracy both times

#I then conduct a simulation analysis to assess how much of a risk this level of measurement error could pose. Applying the logic of randomization inference, across 1000 simulations I switch the high autocracy (explanatory variable) coding (0 vs.~1) for a randomly sampled 5\% of highly-gridded neighborhoods (upper 80th percentile in griddedness) and re-estimate the main model in Table 1, panel (a), column 4.  This test operates under the assumption that measurement error of this degree means that an unknown 5\% of these observations have explanatory variable values potentially miscoded. 
#Figure \ref{audit} plots the distribution of resulting effect estimates from each of the 1000 runs. The effect for autocracy remains positive in 100\% of cases. It is statistically significant at the $\alpha<0.1$ level in 100\%, and at the $\alpha<0.05$ level in 88\%.  My observed effect size is also not an outlier. Ultimately, the true level of measurement error would have to be significantly larger than this for it to explain away my main result. 

fe4 <- lm(orientation_order ~ auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
summary(fe4, cluster=c("country"))

quantile(sub1$orientation_order, c(0.2, 0.4, 0.6, 0.8))

#### set up simulation loop... do it 1000 times

N <- 1000
holder <- as.data.frame(matrix(NA, nrow=N+1, ncol=3))
colnames(holder) <- c("est", "sig_05", "sig_10")
holder[1,1] <- summary(fe4, cluster=c("country"))$coefficients[2,1]
holder[1,2] <- summary(fe4, cluster=c("country"))$coefficients[2,4] < 0.05
holder[1,3] <- summary(fe4, cluster=c("country"))$coefficients[2,4] < 0.1

ids <- sub1$uniqueid[sub1$orientation_order>= 0.8318412]
error_rate <- 0.051
switch_N <- round(error_rate * length(ids), digits=0)
switch_N
#57 #change 57 codings each time of the 1109 very gridded neighborhoods

set.seed(02142)
for(i in 2:(N+1)){
	samp1 <- sample(ids, switch_N, replace=FALSE)
	sub1$auto_med_ALT <- sub1$auto_med
	sub1$auto_med_ALT[sub1$uniqueid %in% samp1] <- 1 - sub1$auto_med[sub1$uniqueid %in% samp1] #switch the IV codings for these
	fenew <- lm(orientation_order ~ auto_med_ALT + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
	holder[i,1] <- summary(fenew, cluster=c("country"))$coefficients[2,1]
	holder[i,2] <- summary(fenew, cluster=c("country"))$coefficients[2,4] < 0.05
	holder[i,3] <- summary(fenew, cluster=c("country"))$coefficients[2,4] < 0.1
	
}

summary(holder$est[2:(N+1)])
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#0.02213 0.02779 0.02937 0.02941 0.03110 0.03739 

mean(holder$sig_05[2:(N+1)])
#0.876
	#88% would be positive effect estimates at the alpha < 0.05 level

mean(holder$sig_10[2:(N+1)])
#[1] 1
	#all of them would be positive effects at the alpha < 0.1 level

#its very likely that hte positive relationship cannot be just due to measurement error of this form

pdf(file="./figs/figureSI2.pdf", height=5, width=9)
hist(holder$est[2:(N+1)], xlim=c(-0.01, 0.04), breaks=30, col="grey77", main="Simulated re-estimates\nTable 1, Column 4", xlab="coefficient on High Autocracy", cex.lab=0.88, cex.main=0.88, ylab="frequency")
hist(holder$est[2:(N+1)][holder$sig_05[2:(N+1)]==TRUE], xlim=c(-0.01, 0.04), breaks=30, col="grey45", add=T)
abline(v=holder$est[1], col="firebrick", lwd=1.6)
abline(v=0, col="black", lwd=1.2, lty="dashed")
text(0.0342, 68, labels="original\nestimate", cex=0.65, col="firebrick")
legend(0.0035, 75, legend=c("significant <0.05", "significant <0.1"), fill=c("grey45", "grey77"))

dev.off()



########################################
#### Table SI.4: Robustness to focus on post-1980 streets 
########################################


fe4 <- lm(orientation_order ~ auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
summary(fe4, cluster=c("country"))

fe4ALT <- lm(orientation_order ~ auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1[sub1$average_age >= 1980,])
summary(fe4ALT, cluster=c("country"))

round(summary(fe4, cluster=c("country"))$coefficients, digits=3)
length(fe4$residual)
round(summary(fe4, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe4ALT, cluster=c("country"))$coefficients, digits=3)
length(fe4ALT $residual)
round(summary(fe4ALT, cluster=c("country"))$adj.r.squared, digits=3)


########################################
#### Table SI.5: Robustness to using Geddes et al. (2014) democracy definition 
########################################

fe4 <- lm(orientation_order ~ gwf_democracy_extended + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
summary(fe4, cluster=c("country"))

lmer4 <- lmer(orientation_order ~ gwf_democracy_extended  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)
summary(lmer4)

lm4 <- lm(orientation_order ~ gwf_democracy_extended  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony, data= sub1)
summary(lm4, cluster=c("country"))
length(lm4$residuals)


########################################
#### Figure SI.3: Non-linear relationship between polyarchy and griddedness: GAM model. 
########################################

set.seed(02139)
gam1 <- gam(orientation_order ~ s(v2x_polyarchy, k=8) + node_count + elev_median + elev_std + as.factor(city_fixed), family=gaussian(link=log), data=sub1, sp=0.2)
summary(gam1)
gam.check(gam1)

dens <- density(sub1$v2x_polyarchy)
dens$y <- (dens$y / 150) - 0.155

pdf(file="./figs/figureSI3.pdf", height=8, width=8) 
plot(gam1, scheme =0, rug=F, seWithMean=T, ylim=c(-0.15, 0.05), ylab="Partial of Avg. Polyarchy", xlab="Avg. Polyarchy")
lines(dens, pch=16, cex=0.5, col="grey33")
polygon(dens, col="grey33")
abline(v=median(sub1$v2x_polyarchy), col="red", lwd=2)
dev.off()



########################################
#### Table SI.6: List of “high autocracy” city-years  
########################################


timenames <- list(timedat.abuja, timedat.accra, timedat.addis_ababa, timedat.arusha, timedat.bamako, timedat.beira, timedat.brikama, timedat.buchanan, timedat.bujumbura, timedat.cape_town, timedat.gaborone, timedat.gombe, timedat.ibadan, timedat.johannesburg, timedat.kampala, timedat.kara, timedat.khartoum, timedat.kigali, timedat.kinshasa, timedat.lagos, timedat.luanda, timedat.lubumbashi, timedat.malabo, timedat.manzini, timedat.maputo, timedat.maseru, timedat.monrovia, timedat.nairobi, timedat.nakuru, timedat.ndola, timedat.oyo, timedat.pointe_noire, timedat.port_elizabeth, timedat.porto_novo, timedat.touba, timedat.windhoek)

high_auts <- as.data.frame(matrix(NA, nrow=0, ncol=4))
colnames(high_auts) <- c("city_fixed", "country", "year", "v2x_polyarchy")
for(i in 1:36){
	l1 <- timenames[[i]]
	l1 <- l1[l1$v2x_polyarchy <= 0.2321786 & l1$colrule==0 & l1$year > 1950 & is.na(l1$v2x_polyarchy)==FALSE ,c("city_fixed", "country", "year", "v2x_polyarchy")]
	if(nrow(l1)>0){
	high_auts <- rbind(high_auts, l1)	
	}
}

high_auts

high_auts$continues_above <- 0
for(i in 1:nrow(high_auts)){
	if(i > 1){
	high_auts$continues_above[i] <- ifelse(high_auts$city_fixed[i]==high_auts$city_fixed[i-1] & high_auts$year[i]== (high_auts$year[i-1] +1), 1, 0)
	}
}
#write.csv(high_auts, file="./code/crossnational_cities/replication_data/data_topost/high_auts_table_temp.csv")

	#i go through here and check when each *unique* autocratic regime started per country rather than coding all high autocracy years as a single regime	
high_auts <- read.csv(file="./data_topost/high_auts_table_temp_NNEDIT.csv", header=TRUE, stringsAsFactors=FALSE)
	
unique(high_auts$regime_num)
length(unique(high_auts$regime_num))	

high_auts.collapse <- as.data.frame(matrix(NA, nrow=54, ncol=3))
colnames(high_auts.collapse) <- c("city", "years", "mean polyrachy")

for(i in 1:54){
	high_auts2 <- high_auts[high_auts$regime_num == unique(high_auts$regime_num)[i],]
	high_auts.collapse[i, 1] <- str_to_title(paste(strsplit(high_auts2$city_fixed[1], "-")[[1]][1], high_auts2$country[1], sep=", "))
	high_auts.collapse[i, 2] <- paste(min(high_auts2$year), max(high_auts2$year), sep="-")
	high_auts.collapse[i, 3] <- mean(high_auts2$v2x_polyarchy)
}	

high_auts.collapse


########################################
#### Figure SI.4: Interaction of precolonial urbanization and neighborhood age   
########################################

table(main2$city_fixed, main2$precolonial_major_city)
#9 were already precolonial cities

m1 <- lmer(orientation_order ~ (average_age * precolonial_major_city) + v2x_polyarchy + colrule + colsettler + average_age + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + settler_colony + (1 | city_fixed), data=main2)
summary(m1)

set.seed(02142)
	locs <- unique(main2$city_fixed)
	N <- 1000
	s <- sim(m1, N)
	r <- slot(s, "ranef")
	f <- slot(s, "fixef")

perdev<-seq(1737,2018,by=2)
length(perdev)

ethchange <- 1

pred.all<-matrix(data=NA,ncol=length(perdev),nrow=4)
for(j in 1:length(perdev)){
tprob.all1 <- matrix(NA, nrow=N, ncol=1)
cprob.all1 <- matrix(NA, nrow=N, ncol=1)
for(i in 1:length(locs)){

dnew <- main2[main2$city_fixed==locs[i] ,] 
	if(nrow(dnew[dnew$average_age > perdev[j]-2 & dnew$average_age <= perdev[j],]) > 0) {
	dnew2 <- dnew[dnew$average_age > perdev[j]-2 & dnew$average_age <= perdev[j],]	
	betas <- f 	
	betas[,"(Intercept)"] <- f[,"(Intercept)"] + r$city_fixed[,toString(locs[i]),]
	xt.modelB <- cbind(1, dnew2$average_age, 1, dnew2$v2x_polyarchy, dnew2$colrule, dnew2$colsettler, dnew2$node_count, dnew2$elev_median, dnew2$elev_std, dnew2$city_elev_std, dnew2$nat_capital_historically, dnew2$ocean_coast, dnew2$border_city, dnew2$rain_mm, dnew2$soil_quality_FAO, dnew2$colonial_power_britain, dnew2$colonial_power_france, dnew2$settler_colony, dnew2$average_age)
	xc.modelB <- cbind(1, dnew2$average_age, 0, dnew2$v2x_polyarchy, dnew2$colrule, dnew2$colsettler, dnew2$node_count, dnew2$elev_median, dnew2$elev_std, dnew2$city_elev_std, dnew2$nat_capital_historically, dnew2$ocean_coast, dnew2$border_city, dnew2$rain_mm, dnew2$soil_quality_FAO, dnew2$colonial_power_britain, dnew2$colonial_power_france, dnew2$settler_colony, 0)

tprob1<-apply((1/(1+exp(-(as.matrix(xt.modelB))%*% t(betas)))),1,as.vector)
cprob1<-apply((1/(1+exp(-(as.matrix(xc.modelB))%*% t(betas)))),1,as.vector)
tprob.all1 <- cbind(tprob.all1, tprob1)
cprob.all1 <- cbind(cprob.all1, cprob1)	
	} else {
	 tprob.all1 <-  tprob.all1
	 cprob.all1 <- cprob.all1
		}
	}

if(ncol(tprob.all1)>2){
tprob.all1 <- tprob.all1[,-1]
cprob.all1 <- cprob.all1[,-1]
tprobA <- apply(tprob.all1,1,mean)
cprobA <- apply(cprob.all1,1,mean)
diffprob1<-tprobA - cprobA
	}else{
		tprob.all1 <- tprob.all1[,-1]
		cprob.all1 <- cprob.all1[,-1]
		diffprob1<-tprob.all1-cprob.all1
		}

if(is.na(mean(diffprob1))==FALSE){
pred.all[,j]<-c(perdev[j],mean(diffprob1),quantile(diffprob1,.025),quantile(diffprob1,.975))
	}else{
		pred.all[,j] <- c(perdev[j], NA, NA, NA)
	}
}

pdf(file="./figs/figureSI4.pdf", height=6, width=6) 
plot(pred.all[1,],pred.all[2,], pch=16, col="white", cex=.7, xlab="Average neighborhood age", ylab="Coefficient on precolonial city", ylim=c(-0.1, 0.04), main="Influence of precolonial urban history\nby neighborhood age", cex.main = .9)
segments(pred.all[1,], pred.all[3,], pred.all[1,], pred.all[4,], col="gray72")
points(pred.all[1,],pred.all[2,], pch=16, col="dodgerblue2", cex=1.2)
rug(main2$average_age)
abline(h=0, col="firebrick")
dev.off()

########################################
#### Table SI.7: Neighborhood-level griddedness (O) and colonial characteristics 
########################################

fe1 <- lm(orientation_order ~ colrule + node_count + elev_median + elev_std + as.factor(city_fixed), data= main2)
summary(fe1, cluster=c("country"))

fe2 <- lm(orientation_order ~ (colsettler * colrule) + colsettler + colrule + node_count + elev_median + elev_std + as.factor(city_fixed), data= main2)
summary(fe2, cluster=c("country"))

fe3 <- lm(orientation_order ~ (capital * colrule) + capital + colrule + node_count + elev_median + elev_std + as.factor(city_fixed), data= main2)
summary(fe3, cluster=c("country"))

fe4 <- lm(orientation_order ~ colrule_britain + colrule_other + colrule_france + node_count + elev_median + elev_std + as.factor(city_fixed), data= main2)
summary(fe4, cluster=c("country"))

m1 <- lmer(orientation_order ~ colrule_france + colsettler + colrule  + node_count + elev_median + elev_std + city_elev_std + nat_capital_currently + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + (1 | city_fixed), data=main2[main2$colrule_france >= 0.5 | main2$colrule_britain >= 0.5 ,])
summary(m1)

round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1$residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2$residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe3, cluster=c("country"))$coefficients, digits=3)
length(fe3$residual)
round(summary(fe3, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe4, cluster=c("country"))$coefficients, digits=3)
length(fe4$residual)
round(summary(fe4, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(m1)$coefficients, digits=3)
length(summary(m1)$residuals)


########################################
#### Table SI.8: Robustness to controlling for history as settler colony 
########################################

lmer4 <- lmer(orientation_order ~ auto_med  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)
summary(lmer4)
round(summary(lmer4)$coefficients, digits=3)

lmer4a <- lmer(orientation_order ~ (auto_med * settler_colony ) + auto_med  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)
round(summary(lmer4a)$coefficients, digits=3)
summary(lmer4a)


########################################
#### Table SI.9: Controlling for foreign links and influences 
########################################

fe1 <- lm(orientation_order ~ casey_soviet + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1 $residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

fe2 <- lm(orientation_order ~ (casey_soviet * auto_med) + casey_soviet + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2 $residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)

fe1 <- lm(orientation_order ~ casey_western_spons + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1 $residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

fe2 <- lm(orientation_order ~ (casey_western_spons * auto_med) + casey_western_spons + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2 $residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)

fe7 <- lm(orientation_order ~ v2exl_legitideolcr_1 + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe7, cluster=c("country"))$coefficients, digits=3)
length(fe7$residual)
round(summary(fe7, cluster=c("country"))$adj.r.squared, digits=3)

fe7a <- lm(orientation_order ~ (v2exl_legitideolcr_1 * auto_med) + v2exl_legitideolcr_1 + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe7a, cluster=c("country"))$coefficients, digits=3)
length(fe7a$residual)
round(summary(fe7a, cluster=c("country"))$adj.r.squared, digits=3)

fe1 <- lm(orientation_order ~ china_fdi_above_median + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1 $residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

fe2 <- lm(orientation_order ~ (china_fdi_above_median * auto_med) + china_fdi_above_median + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2 $residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)
		
fe1 <- lm(orientation_order ~ chi_urban_loan_5yrs + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
summary(fe1, cluster=c("country"))
round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1 $residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

fe2 <- lm(orientation_order ~ (chi_urban_loan_5yrs * auto_med) + chi_urban_loan_5yrs + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2 $residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)


########################################
#### Table SI.10: Testing implications of a “protest proofing” mechanism 
########################################

fe1 <- lm(orientation_order ~ urban_contention + average_age + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1$residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

fe2 <- lm(orientation_order ~ (urban_contention * auto_med) + urban_contention  + average_age  + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2$residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)

fe3 <- lm(orientation_order ~ contention_past5  + auto_med + average_age  + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe3, cluster=c("country"))$coefficients, digits=3)
length(fe3$residual)
round(summary(fe3, cluster=c("country"))$adj.r.squared, digits=3)

fe4 <- lm(orientation_order ~ (contention_past5 * auto_med) + contention_past5  + auto_med + average_age  + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe4, cluster=c("country"))$coefficients, digits=3)
length(fe4$residual)
round(summary(fe4, cluster=c("country"))$adj.r.squared, digits=3)

fe5 <- lm(orientation_order ~ avg_dist_to_hq  + auto_med + average_age  + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe5, cluster=c("country"))$coefficients, digits=3)
length(fe5$residual)
round(summary(fe5, cluster=c("country"))$adj.r.squared, digits=3)

fe6 <- lm(orientation_order ~ (avg_dist_to_hq * auto_med) + avg_dist_to_hq  + auto_med + average_age + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe6, cluster=c("country"))$coefficients, digits=3)
length(fe6$residual)
round(summary(fe6, cluster=c("country"))$adj.r.squared, digits=3)

fe7 <- lm(avg_dist_to_boulevard ~ average_age + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
round(summary(fe7, cluster=c("country"))$coefficients, digits=3)
length(fe7$residual)
round(summary(fe7, cluster=c("country"))$adj.r.squared, digits=3)


########################################
#### Figures SI.5 and SI.6: Unpacking the fixed effects to select cases 
########################################

city_fixed <- c("abuja-combined", "accra-1910", "addis_ababa-5134", "arusha-4800", "bamako-1553", "beira-4521", "brikama-1458", "buchanan-1533", "bujumbura-4078", "cape_town-3268", "gaborone-3587", "gombe-2932", "ibadan-2189", "johannesburg-3673", "kampala-4427", "kara-2026", "khartoum-4335", "kigali-4172", "kinshasa-3209", "lagos-2125", "luanda-3050", "lubumbashi-3756", "malabo-2762", "manzini-4080", "maputo-4220", "maseru-3631", "monrovia-1526", "nairobi-4808", "nakuru-4729", "ndola-3859", "oyo-2191", "pointe_noire-2982", "port_elizabeth-3505", "porto_novo-2101", "touba-1473", "windhoek-3260")

city_names <-  c("abuja", "accra", "addis_ababa", "arusha", "bamako", "beira", "brikama", "buchanan", "bujumbura", "cape_town", "gaborone", "gombe", "ibadan", "johannesburg", "kampala", "kara", "khartoum", "kigali", "kinshasa", "lagos", "luanda", "lubumbashi", "malabo", "manzini", "maputo", "maseru", "monrovia", "nairobi", "nakuru", "ndola", "oyo", "pointe_noire", "port_elizabeth", "porto_novo", "touba", "windhoek")

fe4 <- lm(orientation_order ~ auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
summary(fe4, cluster=c("country"))

#make a plot of the coefficients for each separate case regression: 
est <- summary(fe4, cluster=c("country"))$coefficients[2,1]
high95 <- summary(fe4, cluster=c("country"))$coefficients[2,1] + summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.975, df= summary(fe4, cluster=c("country"))$df[2])
low95 <- summary(fe4, cluster=c("country"))$coefficients[2,1] - summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.975, df= summary(fe4, cluster=c("country"))$df[2])
high90 <- summary(fe4, cluster=c("country"))$coefficients[2,1] + summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.95, df= summary(fe4, cluster=c("country"))$df[2])
low90 <- summary(fe4, cluster=c("country"))$coefficients[2,1] - summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.95, df= summary(fe4, cluster=c("country"))$df[2])


holder <- as.data.frame(matrix(NA, nrow=1, ncol=6))
colnames(holder) <- c("unit", "est", "high95", "low95", "high90", "low90")
holder[1, "unit"] <- "overall"
holder[1, "est"] <- est
holder[1, "high95"] <- high95
holder[1, "low95"] <- low95
holder[1, "high90"] <- high90
holder[1, "low90"] <- low90

for(i in 1:length(city_fixed)){

fe4 <- lm(orientation_order ~ auto_med + node_count + elev_median + elev_std, data= sub1[sub1$city_fixed == city_fixed[i],])
coefs <- as.matrix(summary(fe4)$coefficients)
if("auto_med" %in% rownames(coefs)){
holder2 <- as.data.frame(matrix(NA, nrow=1, ncol=6))
colnames(holder2) <- c("unit", "est", "high95", "low95", "high90", "low90")
holder2[1, "unit"] <- city_names[i]
holder2[1, "est"] <- summary(fe4)$coefficients[2,1]
holder2[1, "high95"] <- summary(fe4)$coefficients[2,1] + summary(fe4)$coefficients[2,2]*qt(0.975, df= summary(fe4)$df[2])
holder2[1, "low95"] <- summary(fe4)$coefficients[2,1] - summary(fe4)$coefficients[2,2]*qt(0.975, df= summary(fe4)$df[2])
holder2[1, "high90"] <- summary(fe4)$coefficients[2,1] + summary(fe4)$coefficients[2,2]*qt(0.95, df= summary(fe4)$df[2])
holder2[1, "low90"] <- summary(fe4)$coefficients[2,1] - summary(fe4)$coefficients[2,2]*qt(0.95, df= summary(fe4)$df[2])

holder <- rbind(holder, holder2)

} #else statement
}

holder <- holder[holder$unit!="buchanan",]

holder2 <- holder[-1,]
holder1 <- holder[1,]

holder2 <- holder2[order(holder2$est, decreasing=T),]

pdf(file="./figs/figureSI5.pdf", height=8, width=8) 
par(mar=c(5,7,4,3))

plot(c(holder1$est, holder2$est), nrow(holder):1, col="white", xlim=c(min(holder$low95) - 0.01, max(holder$high95) + 0.01), ylim=c(0.8, nrow(holder) +0.3), main="Estimate within each city", xlab="Coefficient for High Autocracy (0,1)", ylab="", yaxt="n")
abline(v=0, col="black", lty="dotted")
abline(v=holder$est[1], col="green")
segments(c(holder1$low95, holder2$low95), nrow(holder):1, c(holder1$high95, holder2$high95), nrow(holder):1, col="grey72", lwd=1.2)
segments(c(holder1$low90, holder2$low90), nrow(holder):1, c(holder1$high90, holder2$high90), nrow(holder):1, col="grey22", lwd=1)
points(c(holder1$est, holder2$est), nrow(holder):1, pch=16, col="dodgerblue2")
axis(2, at=c(nrow(holder):1), labels=c(holder1$unit, holder2$unit), las=1, cex.axis=0.75)

dev.off()


fe4 <- lm(orientation_order ~ auto_med + average_age + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)
summary(fe4, cluster=c("country"))

#make a plot of the coefficients for each separate case regression: 
est <- summary(fe4, cluster=c("country"))$coefficients[2,1]
high95 <- summary(fe4, cluster=c("country"))$coefficients[2,1] + summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.975, df= summary(fe4, cluster=c("country"))$df[2])
low95 <- summary(fe4, cluster=c("country"))$coefficients[2,1] - summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.975, df= summary(fe4, cluster=c("country"))$df[2])
high90 <- summary(fe4, cluster=c("country"))$coefficients[2,1] + summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.95, df= summary(fe4, cluster=c("country"))$df[2])
low90 <- summary(fe4, cluster=c("country"))$coefficients[2,1] - summary(fe4, cluster=c("city_fixed"))$coefficients[2,2]*qt(0.95, df= summary(fe4, cluster=c("country"))$df[2])


holder <- as.data.frame(matrix(NA, nrow=1, ncol=6))
colnames(holder) <- c("unit", "est", "high95", "low95", "high90", "low90")
holder[1, "unit"] <- "overall"
holder[1, "est"] <- est
holder[1, "high95"] <- high95
holder[1, "low95"] <- low95
holder[1, "high90"] <- high90
holder[1, "low90"] <- low90

for(i in 1:length(city_fixed)){

fe4 <- lm(orientation_order ~ auto_med + average_age + node_count + elev_median + elev_std, data= sub1[sub1$city_fixed == city_fixed[i],])
coefs <- as.matrix(summary(fe4)$coefficients)
if("auto_med" %in% rownames(coefs)){
holder2 <- as.data.frame(matrix(NA, nrow=1, ncol=6))
colnames(holder2) <- c("unit", "est", "high95", "low95", "high90", "low90")
holder2[1, "unit"] <- city_names[i]
holder2[1, "est"] <- summary(fe4)$coefficients[2,1]
holder2[1, "high95"] <- summary(fe4)$coefficients[2,1] + summary(fe4)$coefficients[2,2]*qt(0.975, df= summary(fe4)$df[2])
holder2[1, "low95"] <- summary(fe4)$coefficients[2,1] - summary(fe4)$coefficients[2,2]*qt(0.975, df= summary(fe4)$df[2])
holder2[1, "high90"] <- summary(fe4)$coefficients[2,1] + summary(fe4)$coefficients[2,2]*qt(0.95, df= summary(fe4)$df[2])
holder2[1, "low90"] <- summary(fe4)$coefficients[2,1] - summary(fe4)$coefficients[2,2]*qt(0.95, df= summary(fe4)$df[2])

holder <- rbind(holder, holder2)

} #else statement
}

holder <- holder[holder$unit!="buchanan",]

holder2 <- holder[-1,]
holder1 <- holder[1,]

holder2 <- holder2[order(holder2$est, decreasing=T),]

pdf(file="./figs/figureSI6.pdf", height=8, width=8) 
par(mar=c(5,7,4,3))

plot(c(holder1$est, holder2$est), nrow(holder):1, col="white", xlim=c(min(holder$low95) - 0.01, max(holder$high95) + 0.01), ylim=c(0.8, nrow(holder) +0.3), main="Estimate within each city", xlab="Coefficient for High Autocracy (0,1)", ylab="", yaxt="n")
abline(v=0, col="black", lty="dotted")
abline(v=holder$est[1], col="green")
segments(c(holder1$low95, holder2$low95), nrow(holder):1, c(holder1$high95, holder2$high95), nrow(holder):1, col="grey72", lwd=1.2)
segments(c(holder1$low90, holder2$low90), nrow(holder):1, c(holder1$high90, holder2$high90), nrow(holder):1, col="grey22", lwd=1)
points(c(holder1$est, holder2$est), nrow(holder):1, pch=16, col="dodgerblue2")
axis(2, at=c(nrow(holder):1), labels=c(holder1$unit, holder2$unit), las=1, cex.axis=0.75)

dev.off()


########################################
#### Mean GDP per capita in a city-year with direct state housing construction is \$4,499.00 versus \$1,493.10 in a city-year with ``sites-and-services" construction.... The probability of state housing construction (of either type) also varies marginally with regime ideology. There is state housing construction in 44\% of city-years with regimes with above average V-Dem socialism scores in the sample (the mean V-Dem socialism score is 0.21 on a 0-1 scale). By contrast, there is state housing construction in 36\% of city-years with below average socialism scores. Note, however, that this difference may be driven entirely by the ANC's housing investments in post-Apartheid South Africa. Removing the three South African cities, there is only state housing construction in 33\% of city-years with above average regime socialism scores, effectively no different from the rate in non-socialist regimes.
########################################

td3 <- rbind(timedat.abuja[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.accra[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.addis_ababa[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.bamako[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.bujumbura[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.cape_town[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.gombe[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.ibadan[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.johannesburg[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.kampala[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.khartoum[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.kinshasa[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.lagos[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.luanda[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.lubumbashi[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.maseru[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.monrovia[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.ndola[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.oyo[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.pointe_noire[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.port_elizabeth[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")], 
timedat.porto_novo[,c("city_fixed", "year", "v2exl_legitideolcr_1", "e_migdppc")] )

d1 <- housing[housing$state_housing_any==1,]


d1$e_migdppc
for(i in 1:nrow(d1)){
	d1$e_migdppc[i] <- td3$e_migdppc[td3$city_fixed == d1$city[i] & td3$year == d1$year[i]]
}

head(d1)
tail(d1)

table(d1$types)
        # builds          mixed sites_services 
           # 410             50             70
           
summary(d1$e_migdppc[d1$types=="builds"])
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#    673    1833    3666    4499    6591   12246       8 

summary(d1$e_migdppc[d1$types!="builds"])
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#  691.6  1078.8  1449.0  1493.1  1679.2  3707.3 

housing$v2exl_legitideolcr_1 <- NA
for(i in 1:nrow(housing)){
	housing$v2exl_legitideolcr_1[i] <- td3$v2exl_legitideolcr_1[td3$city_fixed == housing $city[i] & td3$year == housing $year[i]]
}


mean(housing $v2exl_legitideolcr_1)
#0.2115382

mean(housing $state_housing_any[housing $v2exl_legitideolcr_1 > 0.2115])
#0.4368071
mean(housing $state_housing_any[housing $v2exl_legitideolcr_1 < 0.2115])
#0.3553895

mean(housing $state_housing_any[housing $v2exl_legitideolcr_1 > 0.2115 & ! housing $city %in% c("cape_town-3268", "johannesburg-3673", "port_elizabeth-3505")])
#[1] 0.327027

